import json
import urllib
import sys
import os
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
import time

def main():
    # 设置Chrome驱动
    service = Service(ChromeDriverManager().install())
    driver = webdriver.Chrome(service=service)

    # 打开目标网页
    driver.get("https://www.zhipin.com/web/geek/job?query=java&city=101040100&page=1")

    # 等待页面加载
    time.sleep(5)

    # 获取XHR请求的数据
    xhr_url = "https://www.zhipin.com/wapi/zpgeek/search/joblist.json"
    params = {
        "scene": "1",
        "query": "java",
        "city": "101040100",
        "experience": "",
        "payType": "",
        "partTime": "",
        "degree": "",
        "industry": "",
        "scale": "",
        "stage": "",
        "position": "",
        "jobType": "",
        "salary": "",
        "multiBusinessDistrict": "",
        "multiSubway": "",
        "page": "1",
        "pageSize": "30"
    }

    # 使用Selenium发送请求
    response = driver.execute_script(f"""
        var xhr = new XMLHttpRequest();
        xhr.open('GET', '{xhr_url}?{urllib.parse.urlencode(params)}', false);
        xhr.setRequestHeader('User-Agent', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36');
        xhr.setRequestHeader('Referer', 'https://www.zhipin.com/web/geek/job?query=java&city=101040100&page=1');
        xhr.send(null);
        return xhr.responseText;
    """)
    import json
    data = json.loads(response)
    print(data)

    # 检查 code 是否为 0
    if data.get('code') != 0:
        print("Code is not 0, restarting the program...")
        driver.quit()
        os.execv(sys.executable, ['python'] + sys.argv)
        return

    # 关闭浏览器
    driver.quit()

    # 将数据写入文件
    with open('../job_data.json', 'w', encoding='utf-8') as f:
        json.dump(data, f, ensure_ascii=False, indent=4)

    print("数据已保存到 job_data.json 文件中")

    import csv
    import json

    # 读取 JSON 文件
    with open('../job_data.json', 'r', encoding='utf-8') as f:
        data = json.load(f)

    # 提取 jobList 数据
    job_list = data['zpData']['jobList']

    # 定义 CSV 文件的字段
    fieldnames = [
        'jobName', 'salaryDesc', 'jobExperience', 'jobDegree', 'cityName',
        'areaDistrict', 'businessDistrict', 'brandName', 'brandIndustry',
        'brandScaleName', 'welfareList', 'skills'
    ]

    # 写入 CSV 文件，使用 utf-8-sig 编码
    with open('../job_list.csv', 'w', newline='', encoding='utf-8-sig') as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

        # 写入表头
        writer.writeheader()

        # 写入数据行
        for job in job_list:
            # 将列表转换为字符串以便写入 CSV
            job['welfareList'] = ', '.join(job['welfareList'])
            job['skills'] = ', '.join(job['skills'])

            # 写入每一行数据
            writer.writerow({
                'jobName': job.get('jobName', ''),
                'salaryDesc': job.get('salaryDesc', ''),
                'jobExperience': job.get('jobExperience', ''),
                'jobDegree': job.get('jobDegree', ''),
                'cityName': job.get('cityName', ''),
                'areaDistrict': job.get('areaDistrict', ''),
                'businessDistrict': job.get('businessDistrict', ''),
                'brandName': job.get('brandName', ''),
                'brandIndustry': job.get('brandIndustry', ''),
                'brandScaleName': job.get('brandScaleName', ''),
                'welfareList': job['welfareList'],
                'skills': job['skills']
            })

    print("数据已保存到 job_list.csv 文件中")

if __name__ == "__main__":
    main()
